/*
Code generates the following Figures: 
-	Figure 5: Predicted vs. Observed Trajectories
-	Figure 6: Predicted vs. Observed heterogeneity: Preference and Belief Shifters
-	Figure A.4: Implied vs. Observed heterogeneity in marriage by shifters of preferences and beliefs
-	Figure A.5: Formal comparison between model predictions and observed patterns
-	Figure A.6: Formal comparison between model predictions and observed patterns for schooling by shifters of preferences and beliefs
-	Figure A.7: Formal comparison between model predictions and observed patterns for marriage by shifters of preferences and beliefs

Organised as two parts: 
- 	Part 1: unconditional trajectories 
- 	Part 2: heterogeneity in trajectories by gender norms and liking school
*/ 

* Load user-written programs which create average trajectories given model estimates
*-------------------------------------------------------------------------------
do "$path/3_validation_and_counterfactuals/programs"

set scheme lean2

* PART 1: UNCONDITIONAL TRAJECTORIES
********************************************************************************
********************************************************************************

/* Create and then collapse (across different bootstraps) model-implied trajectories
*-------------------------------------------------------------------------------
* UNCOMMENT TO GENERATE FILE FROM SCRATCH
*
quietly {
tempfile temp
local b=0
simulate_trajectories, bs("_bs`b'")
collapse_trajectories
gen bs=`b'
save `temp', replace	
	
foreach b of numlist 1/100 {
	simulate_trajectories, bs("_bs`b'")
	collapse_trajectories
	gen bs=`b'
	append using `temp'
	save `temp', replace
	}
}
	
save "$data/created_data/simulate_trajectories_collpased_bootrapped", replace

*/

* Create model-implied proportion of girls married and in school at each age (and standard errors)
*-------------------------------------------------------------------------------

use "$data/created_data/simulate_trajectories_collpased_bootrapped", clear
keep unmarried_end mar_end inschool_end age bs

preserve
keep if bs==0
tempfile temp
save `temp'
restore

drop if bs==0
collapse (sd) se_unmarried_end=unmarried_end se_mar_end=mar_end se_inschool_end=inschool_end, by(age)

merge 1:1 age using `temp', nogen

* Merge with empirical analogue and create differences (and standard errors)
*-------------------------------------------------------------------------------

merge 1:1 age using  "$data/created_data/obs_traj", nogen
rename married_obs mar_obs
rename married_obs_se mar_obs_se
	
foreach x in mar inschool { 
	gen `x'_difference=`x'_end-`x'_obs
	gen `x'_se_difference=(se_`x'_end^2+`x'_obs_se^2)^0.5
	gen `x'_p_difference=2 * normal(-abs(`x'_difference/`x'_se_difference))
	}
	
keep age  ///
	mar_end se_mar_end mar_obs mar_obs_se mar_difference mar_se_difference mar_p_difference ///
	inschool_end se_inschool_end inschool_obs inschool_obs_se  inschool_difference inschool_se_difference inschool_p_difference	
order age  ///
	mar_end se_mar_end mar_obs mar_obs_se mar_difference mar_se_difference mar_p_difference ///
	inschool_end se_inschool_end inschool_obs inschool_obs_se  inschool_difference inschool_se_difference inschool_p_difference

save "$output/sim_obs_comp", replace

* figures quoted in text: 
sum inschool_end inschool_obs inschool_p_difference if age==18
sum mar_end mar_obs mar_p_difference if age==18

* draw main trajectory graph (Figure 5)
*-------------------------------------------------------------------------------

preserve
replace inschool_end=. if age>18
replace inschool_obs=. if age>18
gen unmarried_end=1-mar_end
gen unmarried_obs=1-mar_obs
gen one=1

* model prediction
twoway  (area one unmarried_end inschool_end   age if age>12 & age<=22, fcolor(gs14 gs8 gs1)  )  , xtitle("Age") ytitle("Fraction") legend(order(1 "Married" 2 "Unmarried, Out of School" 3 "In School")) graphregion(color(white)) xlabel(13(1)22) ylabel(0(0.2)1) name(model_implied, replace) title("Model Predictions") 

* observed
twoway  (area one unmarried_obs inschool_obs   age if age>12 & age<=22, fcolor(gs14 gs8 gs1)  )  , xtitle("Age") ytitle("Fraction") legend(order(1 "Married" 2 "Unmarried, Out of School" 3 "In School")) graphregion(color(white)) xlabel(13(1)22) ylabel(0(0.2)1) name(observed, replace) title("Observed") 


grc1leg model_implied observed, name(v2, replace)
graph export "$output/model_vs_observed_trajectories.png", width(4000) replace
restore


* draw goodness of fit graph (Figure A5)
*-------------------------------------------------------------------------------

* marriage graph (with CIs)
gen mar_ciup_diff=mar_difference+1.64*mar_se_difference
gen mar_cilo_diff=mar_difference-1.64*mar_se_difference

graph twoway (line mar_end age, lcolor(black)) (line mar_obs age, lcolor(black) lpattern(dash))   (rarea mar_ciup_diff mar_cilo_diff age, color(gs10%40)) (line mar_difference age, lcolor(gs4) lpattern(dot)), xlabel(13(1)22) ytitle("Fraction Married") legend(order(1 "Model prediction" 2 "Observed" 4 "Difference" 3 "90% CI")) name(g_mar, replace)

* school graph (with CIs)
gen inschool_ciup_diff=inschool_difference+1.64*inschool_se_difference
gen inschool_cilo_diff=inschool_difference-1.64*inschool_se_difference

keep if age<19
graph twoway (line inschool_end age, lcolor(black)) (line inschool_obs age, lcolor(black) lpattern(dash))   (rarea inschool_ciup_diff inschool_cilo_diff age, color(gs10%40)) (line inschool_difference age, lcolor(gs4) lpattern(dot)), xlabel(13(1)18) ytitle("Fraction in School") legend(order(1 "Model prediction" 2 "Observed" 4 "Difference" 3 "90% CI")) name(g_sch, replace)

grc1leg g_sch g_mar, ycommon name(overall_comb, replace)
graph export "$output/overall_fit.png", width(2000) replace


* PART 2: HETEROGENEITY IN TRAJECTORIES
********************************************************************************
********************************************************************************


/* Create and then collapse (across different bootstraps) model-implied trajectories by shifters of preferences and beliefs
*-------------------------------------------------------------------------------
* UNCOMMENT TO GENERATE FILE FROM SCRATCH

* generate trajectories

quietly {
* goodgirl
tempfile temp
local b=0
simulate_trajectories, bs("_bs`b'")
collapse_trajectories, by("goodgirl")
gen bs=`b'
save `temp', replace	
	
foreach b of numlist 1/100 {
	simulate_trajectories, bs("_bs`b'")
	collapse_trajectories, by("goodgirl")
	gen bs=`b'
	append using `temp'
	save `temp', replace
	}
save "$data/created_data/simulate_trajectories_collapsed_bootrapped_goodgirl", replace

* likeschool
tempfile temp
local b=0
simulate_trajectories, bs("_bs`b'")
collapse_trajectories, by("likeschool")
gen bs=`b'
save `temp', replace	
	
foreach b of numlist 1/100 {
	simulate_trajectories, bs("_bs`b'")
	collapse_trajectories, by("likeschool")
	gen bs=`b'
	append using `temp'
	save `temp', replace
	}
save "$data/created_data/simulate_trajectories_collapsed_bootrapped_likeschool", replace

}
*/

* heterogeneity in model implied trajectories by whether or not girl complies with gender norms (=goodgirl)
*-------------------------------------------------------------------------------
use "$data/created_data/simulate_trajectories_collapsed_bootrapped_goodgirl", clear

gen i=bs*1000+age
keep age i goodgirl mar_end inschool_end bs
reshape wide mar_end inschool_end , i(i) j(goodgirl)

gen diff_mar=mar_end1-mar_end0
gen diff_sch=inschool_end1-inschool_end0

preserve
keep if bs==0
tempfile temp
save `temp'
restore

drop if bs==0
collapse  (sd)  se_diff_mar=diff_mar se_diff_sch=diff_sch, by(age)

merge 1:1 age  using `temp', nogen

keep age inschool_end0	mar_end0	inschool_end1	mar_end1 diff_mar	diff_sch se_diff_mar	se_diff_sch

tempfile model
save `model'

* heterogeneity in observed trajectories by whether or not girl complies with gender norms (=goodgirl)
*-------------------------------------------------------------------------------
use "$data/created_data/obs_traj_parents_worried", clear

keep age goodgirl married_obs inschool_obs married_obs_se inschool_obs_se 
reshape wide married_obs inschool_obs married_obs_se inschool_obs_se , i(age) j(goodgirl)

gen diff_mar_obs=married_obs1-married_obs0
gen diff_mar_obs_se=(married_obs_se1^2+married_obs_se0^2)^0.5

gen diff_sch_obs=inschool_obs1-inschool_obs0
gen diff_sch_obs_se=(inschool_obs_se1^2+inschool_obs_se0^2)^0.5

merge 1:1 age using `model'

* Figure 6b: Heterogeneity by whether or not parents are worried that daughters behavior conflicts with gender norms (gets home late and has unsuitable friends)
*-------------------------------------------------------------------------------
preserve
keep if age<19
graph twoway  (line inschool_end1  age, lcolor(black))  (line inschool_end0  age, lcolor(black)  lpattern(dash)) , xlabel(13(1)18) ytitle("Fraction in school") legend(order(1 "Follows norms"  2 "Breaks norms"  ) pos(6))  name(sch_dgood_model, replace) xtitle("Age")  title("Model Predictions") ylabel(0(0.2)1)
graph twoway  (line inschool_obs1  age, lcolor(black))  (line inschool_obs0  age, lcolor(black)  lpattern(dash)) ,xlabel(13(1)18) ytitle("Fraction in school") legend(order(1 "Follows norms"  2 "Breaks norms"  ) pos(6))  name(sch_dgood_obs, replace) xtitle("Age")  title("Observed Data") ylabel(0(0.2)1)

grc1leg sch_dgood_model sch_dgood_obs, ycommon
graph export "$output/patterns_by_norms_sch.png", replace width(3000)
restore

* figures in text
sum diff_sch_obs diff_sch if age<19

* Figure A4b: Heterogeneity by whether or not parents are worried that daughters behavior conflicts with gender norms (gets home late and has unsuitable friends)
*-------------------------------------------------------------------------------
graph twoway  (line mar_end1  age, lcolor(black))  (line mar_end0  age, lcolor(black)  lpattern(dash)) , xlabel(13(1)22) ytitle("Fraction Married") legend(order(1 "Follows norms"  2 "Breaks norms"  ) pos(6))  name(mar_dgood_model, replace) xtitle("Age")  title("Model Predictions")
graph twoway  (line married_obs1  age, lcolor(black))  (line married_obs0  age, lcolor(black)  lpattern(dash)) , xlabel(13(1)22) ytitle("Fraction Married") legend(order(1 "Follows norms"  2 "Breaks norms"  ) pos(6))  name(mar_dgood_obs, replace) xtitle("Age")  title("Observed Data")

grc1leg mar_dgood_model mar_dgood_obs, ycommon
graph export "$output/patterns_by_norms_mar.png", replace width(3000)



* Figure A.7: Formal comparison between model predictions and observed patterns for marriage by shifters of preferences and beliefs (draw second graph)
*-------------------------------------------------------------------------------
* calc diff in diff and SEs
gen diff_diff_mar=diff_mar-diff_mar_obs
gen se_diff_diff_mar=(se_diff_mar^2+diff_mar_obs_se^2)^0.5

gen diff_diff_sch=diff_sch-diff_sch_obs
gen se_diff_diff_sch=(se_diff_sch^2+diff_sch_obs_se^2)^0.5

gen ciup_diff_diff_mar=diff_diff_mar+1.64*se_diff_diff_mar
gen cilo_diff_diff_mar=diff_diff_mar-1.64*se_diff_diff_mar

graph twoway (line married_obs0  age, lcolor(black)) (line married_obs1 age, lcolor(red) lpattern(solid)) (line mar_end0  age, lcolor(black) lpattern(dash)) (line mar_end1 age, lcolor(red) lpattern(dash))   (rarea ciup_diff_diff_mar cilo_diff_diff_mar age, color(gs10%40)) (line diff_diff_mar age, lcolor(gs4) lpattern(dot)), xlabel(13(1)22) ytitle("Fraction Married") legend(order(1 "Model prediction - Breaks norms" 3 "Observed - Breaks norms" 2 "Model prediction - Doesn't break norms"  4 "Observed - Doesn't break norms" 5 "Difference in Difference" 6 "90% CI") pos(6))  name(g2_mar_dgood, replace) xtitle("Age")  title("By whether daughter breaks norms")

* Figure A.6: Formal comparison between model predictions and observed patterns for marriage by shifters of preferences and beliefs (draw second graph)
*-------------------------------------------------------------------------------
preserve
keep if age<19
gen ciup_diff_diff_sch=diff_diff_sch+1.64*se_diff_diff_sch
gen cilo_diff_diff_sch=diff_diff_sch-1.64*se_diff_diff_sch

graph twoway (line inschool_obs0  age, lcolor(black)) (line inschool_obs1 age, lcolor(red) lpattern(solid)) (line inschool_end0  age, lcolor(black) lpattern(dash)) (line inschool_end1 age, lcolor(red) lpattern(dash))   (rarea ciup_diff_diff_sch cilo_diff_diff_sch age, color(gs10%40)) (line diff_diff_sch age, lcolor(gs4) lpattern(dot)), xlabel(13(1)18) ytitle("Fraction in School") legend(order(1 "Model prediction - Breaks norms" 3 "Observed - Breaks norms" 2 "Model prediction - Doesn't break norms"  4 "Observed - Doesn't break norms" 5 "Difference in Difference" 6 "90% CI") pos(6)) name(g2_sch_dgood, replace) xtitle("Age") title("By whether daughter breaks norms")
restore


* heterogeneity in model implied trajectories by whether or not girl likes school 
*-------------------------------------------------------------------------------
use  "$data/created_data/simulate_trajectories_collapsed_bootrapped_likeschool", clear

gen i=bs*1000+age
keep age i likeschool mar_end inschool_end bs
reshape wide mar_end inschool_end , i(i) j(likeschool)

gen diff_mar=mar_end1-mar_end0
gen diff_sch=inschool_end1-inschool_end0

preserve
keep if bs==0
tempfile temp
save `temp'
restore

drop if bs==0
collapse  (sd)  se_diff_mar=diff_mar se_diff_sch=diff_sch, by(age)

merge 1:1 age  using `temp', nogen

keep age inschool_end0	mar_end0	inschool_end1	mar_end1 diff_mar	diff_sch se_diff_mar	se_diff_sch

tempfile model
save `model'

* heterogeneity in observed trajectories by whether or not girl is hard working
*-------------------------------------------------------------------------------

use "$data/created_data/obs_traj_hard_working", clear

keep age likeschool married_obs inschool_obs married_obs_se inschool_obs_se 
reshape wide married_obs inschool_obs married_obs_se inschool_obs_se , i(age) j(likeschool)

gen diff_mar_obs=married_obs1-married_obs0
gen diff_mar_obs_se=(married_obs_se1^2+married_obs_se0^2)^0.5

gen diff_sch_obs=inschool_obs1-inschool_obs0
gen diff_sch_obs_se=(inschool_obs_se1^2+inschool_obs_se0^2)^0.5

merge 1:1 age using `model'

* Figure 6a: Heterogeneity by whether or not girl likes school (in the choice experiments) and whether or not mother reported girl is hardworking (observational data)
*-------------------------------------------------------------------------------
preserve
keep if age<19
graph twoway  (line inschool_end1  age, lcolor(black))  (line inschool_end0  age, lcolor(black)  lpattern(dash)) , xlabel(13(1)18) ytitle("Fraction in school") legend(order(1 "Likes school"  2 "Doesn't like school" ) pos(6))  name(sch_dlike_model, replace) xtitle("Age")  title("Model Predictions") ylabel(0(0.2)1)
graph twoway  (line inschool_obs1  age, lcolor(black))  (line inschool_obs0  age, lcolor(black)  lpattern(dash)) ,xlabel(13(1)18) ytitle("Fraction in school") legend(order(1 1 "Likes school"  2 "Doesn't like school"  ) pos(6))  name(sch_dlike_obs, replace) xtitle("Age")  title("Observed Data") ylabel(0(0.2)1)

grc1leg sch_dlike_model sch_dlike_obs
graph export "$output/patterns_by_like_sch.png", replace width(3000)
restore

* figures in text
sum diff_sch_obs diff_sch if age<19

* Figure A.4a: Heterogeneity by whether or not girl likes school (in the choice experiments) and whether or not mother reported girl is hardworking (observational data)
*-------------------------------------------------------------------------------

graph twoway  (line mar_end1  age, lcolor(black))  (line mar_end0  age, lcolor(black)  lpattern(dash)) , xlabel(13(1)22) ytitle("Fraction Married") legend(order(1 "Likes school"  2 "Doesn't like school"  ) pos(6))  name(mar_dlike_model, replace) xtitle("Age")  title("Model Predictions")
graph twoway  (line married_obs1  age, lcolor(black))  (line married_obs0  age, lcolor(black)  lpattern(dash)) , xlabel(13(1)22) ytitle("Fraction Married") legend(order(1 "Hardworking"  2 "Not Hardworking"  ) pos(6))  name(mar_dlike_obs, replace) xtitle("Age")  title("Observed Data")

graph combine mar_dlike_model mar_dlike_obs, ycommon
graph export "$output/patterns_by_like_mar.png", replace width(3000)


* Figure A.7: Formal comparison between model predictions and observed patterns for marriage by shifters of preferences and beliefs (draw first graph)
*-------------------------------------------------------------------------------
gen diff_diff_mar=diff_mar-diff_mar_obs
gen se_diff_diff_mar=(se_diff_mar^2+diff_mar_obs_se^2)^0.5

gen diff_diff_sch=diff_sch-diff_sch_obs
gen se_diff_diff_sch=(se_diff_sch^2+diff_sch_obs_se^2)^0.5

gen ciup_diff_diff_mar=diff_diff_mar+1.64*se_diff_diff_mar
gen cilo_diff_diff_mar=diff_diff_mar-1.64*se_diff_diff_mar

graph twoway (line married_obs0  age, lcolor(black)) (line married_obs1 age, lcolor(red) lpattern(solid)) (line mar_end0  age, lcolor(black) lpattern(dash)) (line mar_end1 age, lcolor(red) lpattern(dash))   (rarea ciup_diff_diff_mar cilo_diff_diff_mar age, color(gs10%40)) (line diff_diff_mar age, lcolor(gs4) lpattern(dot)), xlabel(13(1)22) ytitle("Fraction Married") legend(order(1 "Model prediction - Doesn't like school" 3 "Observed - Not Hardworking" 2 "Model prediction - Likes school"  4 "Observed - Hardworking" 5 "Difference in Difference" 6 "90% CI")  pos(6))  name(g2_mar_dlike, replace) xtitle("Age")  title("By whether daughter likes school")

* Figure A.6: Formal comparison between model predictions and observed patterns for marriage by shifters of preferences and beliefs (draw first graph)
*-------------------------------------------------------------------------------
preserve
keep if age<19
gen ciup_diff_diff_sch=diff_diff_sch+1.64*se_diff_diff_sch
gen cilo_diff_diff_sch=diff_diff_sch-1.64*se_diff_diff_sch

graph twoway (line inschool_obs0  age, lcolor(black)) (line inschool_obs1 age, lcolor(red) lpattern(solid)) (line inschool_end0  age, lcolor(black) lpattern(dash)) (line inschool_end1 age, lcolor(red) lpattern(dash))   (rarea ciup_diff_diff_sch cilo_diff_diff_sch age, color(gs10%40)) (line diff_diff_sch age, lcolor(gs4) lpattern(dot)), xlabel(13(1)18) ytitle("Fraction in School") legend(order(1 "Model prediction - Doesn't like school" 3 "Observed - Not Hardworking" 2 "Model prediction - Likes school"  4 "Observed - Hardworking" 5 "Difference in Difference" 6 "90% CI") pos(6) ) name(g2_sch_dlike, replace) xtitle("Age")  title("By whether daughter likes school")
restore

* Export Figures A.6 and A.7
*-------------------------------------------------------------------------------
graph combine  g2_sch_dlike g2_sch_dgood , ycommon name(overall_comb_d_sch, replace)
graph export "$output/fit_by_het_sch.png", width(2000) replace

graph combine  g2_mar_dlike g2_mar_dgood , ycommon name(overall_comb_d_mar, replace)
graph export "$output/fit_by_het_mar.png", width(2000) replace

